import os
import pandas as pd
import csv
import numpy as np
import math
from matplotlib.lines import Line2D
import matplotlib as mpl
from matplotlib.patches import Patch
import matplotlib.pyplot as plt
import random
from scipy.spatial import distance
from scipy import stats
from sklearn import manifold
from sklearn.decomposition import PCA
import statsmodels.stats.multitest as sm
from mpl_toolkits.axes_grid1 import make_axes_locatable

def get_cols(num):
    colormap_20, colormap_40b, colormap_40c = mpl.cm.get_cmap('tab20', 256), mpl.cm.get_cmap('tab20b', 256), mpl.cm.get_cmap('tab20c', 256)
    norm, norm2 = mpl.colors.Normalize(vmin=0, vmax=19), mpl.colors.Normalize(vmin=20, vmax=39)
    m1, m2, m3 = mpl.cm.ScalarMappable(norm=norm, cmap=colormap_20), mpl.cm.ScalarMappable(norm=norm, cmap=colormap_40b), mpl.cm.ScalarMappable(norm=norm2, cmap=colormap_40c)
    colors_20 = [m1.to_rgba(a) for a in range(20)]
    colors_40 = [m2.to_rgba(a) for a in range(20)]+[m3.to_rgba(a) for a in range(20,40)]
    if num < 21: return colors_20
    elif num < 41: return colors_40
    else: return colors_40+colors_40+colors_40

Here I have several sections showing an exploration of genes for PET degradation present in the PICRUSt2 predicted metagenomes.

Summary

Genes included

Summary of the genes that we are looking for in the predicted metagenomes.

cols = ['KEGG ortholog', 'Product', 'Added to PICRUSt2']
genes = [['K21104', 'Poly(ethylene terephthalate) hydrolase (PETase)', 'HMM'], ['K18074', 'Terephthalate 1,2-dioxygenase oxygenase component (alpha subunit; tphA2)', 'HMM'], ['K18075', 'Terephthalate 1,2-dioxygenase oxygenase component (alpha subunit; tphA3)', 'HMM'], ['K18076', '1,2-dihydroxy-3,5-cyclohexadiene-1,4-dicarboxylate dehydrogenase (tphB)', 'HMM'], ['K00448', 'Protocatechuate 3,4-dioxygenase (alpha subunit; pcaG)', 'Default'], ['K00449', 'Protocatechuate 3,4- dioxygenase (beta subunit; pcaH)', 'Default'], ['K01857', '3-carboxy-cis,cis-muconate cycloisomerase (pcaB)', 'Default'], ['K01607', '4-carboxymuconolactone decarboxylase (pcaC)', 'Default'], ['K14727', '3-oxoadipate enol-lactonase / 4-carboxymuconolactone decarboxylase (pcaL)', 'Default'], ['K01055', '3-oxoadipate enol-lactonase (pcaD)', 'Default'], ['K01031', '3-oxoadipate CoA-transferase (alpha subunit; pcaI)', 'Default'], ['K01032', '3-oxoadipate CoA-transferase (beta subunit; pcaJ)', 'Default']]
genes_df = pd.DataFrame(genes, columns=cols)
#genes_df = genes_df.set_index('KEGG ortholog')
py$genes_df %>%
  kable() %>%
  kable_styling()
KEGG ortholog Product Added to PICRUSt2
K21104 Poly(ethylene terephthalate) hydrolase (PETase) HMM
K18074 Terephthalate 1,2-dioxygenase oxygenase component (alpha subunit; tphA2) HMM
K18074 Terephthalate 1,2-dioxygenase oxygenase component (alpha subunit; tphA3) HMM
K18076 1,2-dihydroxy-3,5-cyclohexadiene-1,4-dicarboxylate dehydrogenase (tphB) HMM
K00448 Protocatechuate 3,4-dioxygenase (alpha subunit; pcaG) Default
K00449 Protocatechuate 3,4- dioxygenase (beta subunit; pcaH) Default
K01857 3-carboxy-cis,cis-muconate cycloisomerase (pcaB) Default
K01607 4-carboxymuconolactone decarboxylase (pcaC) Default
K14727 3-oxoadipate enol-lactonase / 4-carboxymuconolactone decarboxylase (pcaL) Default
K01055 3-oxoadipate enol-lactonase (pcaD) Default
K01031 3-oxoadipate CoA-transferase (alpha subunit; pcaI) Default
K01032 3-oxoadipate CoA-transferase (beta subunit; pcaJ) Default
genes_df = genes_df.set_index('KEGG ortholog')

PETases predicted

Look at the ASVs predicted to contain PETases and their taxonomic classifications. Note that many of these ASVs are currently unclassified at even the phylum level.

petases = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_PETases.csv', header=None, index_col=0)
#print(petases.loc['ASV000020', :].values[0])
asvs = list(petases.index.values)
copies = list(petases.loc[:, 1])

abun = pd.read_csv('/Users/robynwright/Documents/OneDrive/PhD_Plastic_Oceans/Experiments/PET_MiSeq2/basic/Treatment1_All_percent.csv', index_col=0, header=0)

taxonomy = pd.read_csv('/Users/robynwright/Documents/OneDrive/PhD_Plastic_Oceans/Experiments/PET_MiSeq2/basic/Taxonomy.csv', header=0, index_col=0)
taxonomy = taxonomy.loc[asvs, :]
labels = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
maxs, maxs_trt = [], []
for asv in asvs:
  this_asv = taxonomy.loc[asv, :].values
  for b in range(len(this_asv)):
    if not isinstance(this_asv[b], str):
      if b == 6 and 'Unclassified' not in this_asv[b-1]:
        new = this_asv[b-1]+' sp.'
      elif 'Unclassified' not in this_asv[b-1]:
        new = 'Unclassified '+this_asv[b-1]
      else:
        new = this_asv[b-1]
      taxonomy.loc[asv, labels[b]] = new
    elif b == 6 and isinstance(this_asv[b], str):
      taxonomy.loc[asv, labels[b]] = this_asb[b-1]+' '+this_asv[b]
  maximum = abun.loc[asv, :].max(axis=0)
  maximum_id = abun.loc[asv, :].idxmax(axis=1)
  maxs.append(maximum), maxs_trt.append(maximum_id)
taxonomy.drop(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus'], axis=1, inplace=True)
taxonomy['PETase copies'] = copies
taxonomy['Maximum relative abundance (%)'] = maxs
taxonomy['Treatment'] = maxs_trt
#taxonomy = taxonomy.sort_values(['Class', 'Order', 'Family', 'Genus', 'Species'])
taxonomy = taxonomy.sort_values(['Maximum relative abundance (%)'], ascending=False)
py$taxonomy %>%
  kable() %>%
  kable_styling()
Species PETase copies Maximum relative abundance (%) Treatment
ASV000020 Pseudomonas sp. 2 12.4684821 Day42PET2
ASV000043 Azomonas sp. 2 4.3419379 Day14WeatherPET1
ASV000098 Unclassified Gammaproteobacteria 2 1.1860303 Day14WeatherPET1
ASV000272 Pseudomonas sp. 2 0.8623088 Day42WeatherPET2
ASV000126 Azomonas sp. 2 0.5006771 Day14WeatherPET1
ASV000610 Unclassified Rhodobacteraceae 1 0.0762311 Inoc4
ASV000486 Unclassified Pseudonocardiaceae 2 0.0731886 Day21WeatherPET3
ASV000859 Azomonas sp. 2 0.0575374 Day30WeatherPET1
ASV000621 Unclassified Pseudomonadaceae 2 0.0533508 Day14WeatherPET1
ASV002089 Marinobacter sp. 2 0.0508259 Day07PET1
ASV001854 Azomonas sp. 2 0.0504286 Day42PET2
ASV002886 Azomonas sp. 2 0.0460299 Day30WeatherPET1
ASV002319 Azomonas sp. 2 0.0386274 Day21WeatherPET1
ASV002444 Unclassified Rhodobacteraceae 1 0.0376967 Inoc3
ASV008863 Unclassified Gammaproteobacteria 2 0.0365943 Day21WeatherPET3
ASV008860 Unclassified Pseudomonadaceae 2 0.0365943 Day21WeatherPET3
ASV009408 Unclassified Gammaproteobacteria 2 0.0363901 Day30PET3
ASV001422 Azomonas sp. 2 0.0359567 Day30WeatherPET3
ASV005194 Azomonas sp. 2 0.0345224 Day30WeatherPET1
ASV006460 Spongiispira sp. 1 0.0333556 Inoc1
ASV001694 Unclassified Rhodobacteraceae 1 0.0333556 Inoc1
ASV001672 Unclassified Pseudomonadaceae 2 0.0291015 Day07WeatherPET3
ASV005178 Unclassified Gammaproteobacteria 2 0.0288226 Day30PET1
ASV001181 Unclassified Pseudonocardiaceae 2 0.0288226 Day30PET1
ASV006452 Parvibaculum sp. 2 0.0285103 Day21PET2
ASV002594 Azomonas sp. 2 0.0278164 Day42WeatherPET2
ASV003071 Azomonas sp. 2 0.0257715 Day42WeatherPET3
ASV005905 Unclassified Gammaproteobacteria 2 0.0257715 Day42WeatherPET3
ASV005761 Unclassified Pseudomonadaceae 2 0.0252143 Day42PET2
ASV005789 Unclassified Pseudomonadaceae 2 0.0252143 Day42PET2
ASV009940 Azomonas sp. 2 0.0252143 Day42PET2
ASV009965 Azomonas sp. 2 0.0252143 Day42PET2
ASV009960 Azomonas sp. 2 0.0252143 Day42PET2
ASV009947 Unclassified Alphaproteobacteria 2 0.0252143 Day42PET2
ASV003868 Unclassified Micromonosporaceae 1 0.0248433 Day01ExtractionControl
ASV003774 Azomonas sp. 2 0.0243962 Day21WeatherPET3
ASV005219 Unclassified Cellvibrionales 2 0.0243962 Day21WeatherPET3
ASV004312 Oleibacter sp. 2 0.0243843 Day01LowCrys1
ASV002442 Azomonas sp. 2 0.0242028 Day30WeatherPET2
ASV005769 Azomonas sp. 2 0.0242028 Day30WeatherPET2
ASV008825 Unclassified Pseudonocardiaceae 2 0.0236202 Day21PET3
ASV009426 Azomonas sp. 2 0.0230150 Day30WeatherPET1
ASV009448 Unclassified Rhodobacteraceae 2 0.0230150 Day30WeatherPET1
ASV002887 Azomonas sp. 2 0.0222255 Day14PET2
ASV003511 Unclassified Proteobacteria 2 0.0218261 Day07WeatherPET3
ASV008802 Unclassified Gammaproteobacteria 2 0.0213828 Day21PET2
ASV008806 Unclassified Stappiaceae 2 0.0213828 Day21PET2
ASV001853 Azomonas sp. 2 0.0205196 Day14WeatherPET1
ASV007101 Azomonas sp. 2 0.0193287 Day42WeatherPET3
ASV002893 Azomonas sp. 2 0.0193137 Day21WeatherPET1
ASV003151 Aliisedimentitalea sp. 1 0.0188484 Inoc3
ASV014304 Unclassified Gammaproteobacteria 2 0.0181951 Day30PET3
ASV014303 Azomonas sp. 2 0.0181951 Day30PET3
ASV014290 Azomonas sp. 2 0.0181951 Day30PET3
ASV014297 Azomonas sp. 2 0.0181951 Day30PET3
ASV006801 Unclassified Gammaproteobacteria 2 0.0181521 Day30WeatherPET2
ASV008182 Azomonas sp. 2 0.0181324 Day07WeatherPET2
ASV006758 Unclassified Gammaproteobacteria 2 0.0178010 Day30PET2
ASV004559 Aliisedimentitalea sp. 1 0.0170692 Day01NoC3
ASV003951 Unclassified Rhodobacteraceae 1 0.0168095 Day30LowCrysWater3
ASV009972 Unclassified Gammaproteobacteria 2 0.0163159 Day42PET3
ASV013647 Unclassified Gammaproteobacteria 2 0.0157468 Day21PET3
ASV013603 Azomonas sp. 2 0.0157468 Day21PET3
ASV008855 Unclassified Gammaproteobacteria 2 0.0154544 Day21WeatherPET2
ASV008847 Azomonas sp. 2 0.0154544 Day21WeatherPET2
ASV004316 Unclassified Frankiales 1 0.0153633 Day14ExtractionControl
ASV004030 Unclassified Rhodobacteraceae 1 0.0152462 Inoc4
ASV002737 Unclassified Pseudonocardiaceae 2 0.0148170 Day14PET2
ASV012702 Unclassified Gammaproteobacteria 2 0.0148170 Day14PET2
ASV012666 Unclassified Rhodobacteraceae 2 0.0148170 Day14PET2
ASV012665 Unclassified Gammaproteobacteria 2 0.0148170 Day14PET2
ASV012068 Unclassified Cellvibrionales 2 0.0145507 Day07WeatherPET3
ASV012061 Azomonas sp. 2 0.0145507 Day07WeatherPET3
ASV012082 Azomonas sp. 2 0.0145507 Day07WeatherPET3
ASV005294 Azomonas sp. 2 0.0145507 Day07WeatherPET3
ASV012056 Unclassified Gammaproteobacteria 2 0.0145507 Day07WeatherPET3
ASV004738 Unclassified Gammaproteobacteria 2 0.0145507 Day07WeatherPET3
ASV009119 Unclassified Pseudonocardiaceae 2 0.0144760 Day30NoC2
ASV013553 Azomonas sp. 2 0.0142552 Day21PET2
ASV013564 Unclassified Gammaproteobacteria 2 0.0142552 Day21PET2
ASV013593 Unclassified Gammaproteobacteria 2 0.0142552 Day21PET2
ASV009988 Azomonas sp. 2 0.0139198 Day42WeatherPET1
ASV009994 Unclassified Gammaproteobacteria 2 0.0139198 Day42WeatherPET1
ASV004344 Azomonas sp. 2 0.0139198 Day42WeatherPET1
ASV010004 Azomonas sp. 2 0.0139198 Day42WeatherPET1
ASV005304 Unclassified Gammaproteobacteria 2 0.0139082 Day42WeatherPET2
ASV006826 Azomonas sp. 2 0.0134838 Day30WeatherPET3
ASV012910 Azomonas sp. 2 0.0131087 Day14WeatherPET3
ASV012904 Unclassified Gammaproteobacteria 2 0.0131087 Day14WeatherPET3
ASV012585 Azomonas sp. 2 0.0128916 Day14PET1
ASV012633 Azomonas sp. 2 0.0128916 Day14PET1
ASV010052 Unclassified Proteobacteria 2 0.0128858 Day42WeatherPET3
ASV010061 Unclassified Gammaproteobacteria 2 0.0128858 Day42WeatherPET3
ASV013707 Unclassified Proteobacteria 2 0.0128758 Day21WeatherPET1
ASV013670 Unclassified Gammaproteobacteria 2 0.0128758 Day21WeatherPET1
ASV013658 Unclassified Alphaproteobacteria 2 0.0128758 Day21WeatherPET1
ASV004307 Unclassified Rhodobacteraceae 1 0.0126167 Day14LowCrysWater2
ASV014756 Unclassified Pseudoalteromonadaceae 2 0.0126072 Day42PET2
ASV008485 Unclassified Gammaproteobacteria 2 0.0123117 Day14WeatherPET1
ASV018037 Azomonas sp. 2 0.0121981 Day21WeatherPET3
ASV018061 Unclassified Gammaproteobacteria 2 0.0121981 Day21WeatherPET3
ASV018079 Azomonas sp. 2 0.0121981 Day21WeatherPET3
ASV004730 Unclassified Rhizobiaceae 2 0.0121014 Day30WeatherPET2
ASV009483 Unclassified Cellvibrionales 2 0.0121014 Day30WeatherPET2
ASV009377 Azomonas sp. 2 0.0118673 Day30PET2
ASV009381 Azomonas sp. 2 0.0118673 Day30PET2
ASV013508 Unclassified Rhodobacteraceae 2 0.0118645 Day21PET1
ASV013503 Unclassified Stappiaceae 2 0.0118645 Day21PET1
ASV013542 Azomonas sp. 2 0.0118645 Day21PET1
ASV010100 Unclassified Pseudomonadaceae 2 0.0116925 Day42BHET1
ASV014338 Unclassified Vibrionaceae 2 0.0115075 Day30WeatherPET1
ASV014331 Unclassified Rhodobacteraceae 2 0.0115075 Day30WeatherPET1
ASV007925 Unclassified Bacillales 1 0.0112360 Day07NoC2
ASV011883 Unclassified Cellvibrionales 2 0.0112070 Day07PET2
ASV004106 Unclassified Rhodobacteraceae 1 0.0106712 Day01WeatherPET2
ASV013716 Azomonas sp. 2 0.0103029 Day21WeatherPET2
ASV013745 Unclassified Pseudomonadaceae 2 0.0103029 Day21WeatherPET2
ASV013760 Maliponia sp. 1 0.0103029 Day21WeatherPET2
ASV010979 Unclassified Bacillales 1 0.0100852 Day03PET1
ASV006256 Unclassified Rhodobacteraceae 1 0.0099015 Day21LowCrysWater2
ASV009555 Unclassified Gammaproteobacteria 2 0.0089892 Day30WeatherPET3
ASV016970 Unclassified Rhodobacteraceae 1 0.0085346 Day01NoC3
ASV017089 Unclassified Gammaproteobacteria 2 0.0083472 Day14PET3
ASV012771 Pseudoroseicyclus sp. 2 0.0082078 Day14WeatherPET1
ASV012788 Unclassified Gammaproteobacteria 2 0.0082078 Day14WeatherPET1
ASV017868 Unclassified Gammaproteobacteria 2 0.0078734 Day21PET3
ASV017895 Azomonas sp. 2 0.0078734 Day21PET3
ASV017882 Unclassified Gammaproteobacteria 1 0.0078734 Day21PET3
ASV011523 Azomonas sp. 2 0.0074906 Day07NoC2
ASV010556 Unclassified Rhodobacteraceae 1 0.0074708 Day01BHET3
ASV017843 Unclassified Proteobacteria 2 0.0071276 Day21PET2
ASV017831 Azomonas sp. 2 0.0071276 Day21PET2
ASV017817 Unclassified Gammaproteobacteria 2 0.0071276 Day21PET2
ASV017821 Unclassified Pseudomonadaceae 2 0.0071276 Day21PET2
ASV017822 Unclassified Vibrionaceae 2 0.0071276 Day21PET2
ASV017829 Unclassified Gammaproteobacteria 2 0.0071276 Day21PET2
ASV014823 Unclassified Gammaproteobacteria 2 0.0069541 Day42WeatherPET2
ASV014805 Unclassified Oceanospirillales 2 0.0069541 Day42WeatherPET2
ASV016099 Unclassified Rhodobacteraceae 1 0.0067650 Day07NoC3
ASV017219 Unclassified Gammaproteobacteria 2 0.0065544 Day14WeatherPET3
ASV017001 Unclassified Gammaproteobacteria 2 0.0064458 Day14PET1
ASV017987 Azomonas sp. 2 0.0064379 Day21WeatherPET1
ASV014361 Unclassified Gammaproteobacteria 2 0.0060507 Day30WeatherPET2
ASV014246 Unclassified Gammaproteobacteria 2 0.0059337 Day30PET2
ASV017766 Unclassified Rhodobacteraceae 2 0.0059323 Day21PET1
ASV017793 Azomonas sp. 2 0.0059323 Day21PET1
ASV014860 Unclassified Rhodobacteraceae 1 0.0058462 Day42BHET1
ASV016459 Azomonas sp. 2 0.0057389 Day07WeatherPET1
ASV016415 Azomonas sp. 2 0.0051883 Day07PET3
ASV017177 Unclassified Gammaproteobacteria 2 0.0046882 Day14WeatherPET2
ASV017151 Pseudomonas sp. 2 0.0046882 Day14WeatherPET2
ASV013926 Azomonas sp. 2 0.0045037 Day01LowCrysWater3
ASV013924 Unclassified Oceanospirillales 1 0.0045037 Day01LowCrysWater3
ASV010710 Unclassified Rhodobacteraceae 1 0.0041789 Day03NoC3
ASV017122 Unclassified Bacteria 2 0.0041039 Day14WeatherPET1
ASV016936 Unclassified Pseudomonadaceae 2 0.0040900 Day14LowCrys1
sample_order = ['Day00Inoc', 'Day01NoC', 'Day03NoC', 'Day07NoC', 'Day14NoC', 'Day21NoC', 'Day30NoC', 'Day42NoC', 'Day01LowCrysWater', 'Day03LowCrysWater', 'Day07LowCrysWater', 'Day14LowCrysWater', 'Day21LowCrysWater', 'Day30LowCrysWater', 'Day42LowCrysWater', 'Day01LowCrys', 'Day03LowCrys', 'Day07LowCrys', 'Day14LowCrys', 'Day21LowCrys', 'Day30LowCrys', 'Day42LowCrys', 'Day01PET', 'Day03PET', 'Day07PET', 'Day14PET', 'Day21PET', 'Day30PET', 'Day42PET', 'Day01WeatherPET', 'Day03WeatherPET', 'Day07WeatherPET', 'Day14WeatherPET', 'Day21WeatherPET', 'Day30WeatherPET', 'Day42WeatherPET', 'Day01BHET', 'Day03BHET', 'Day07BHET', 'Day14BHET', 'Day21BHET', 'Day30BHET', 'Day42BHET']
functions_keeping = ['PETase', 'tphA2', 'tphA3', 'tphB', 'K18074', 'K18075', 'K18076', 'K00448', 'K00449', 'K01857', 'K01607', 'K14727', 'K01055', 'K01031', 'K01032']
colors = ['k', 'y', 'r', 'm', 'b', 'g', 'orange']
labels = ['Inoculum', 'No carbon', 'Amorphous PET\nplanktonic', 'Amorphous PET\nbiofilm', 'PET powder', 'Weathered PET powder', 'BHET']
trts = ['Inoc', 'NoC', 'LowCrysWater', 'LowCrys', 'PET', 'WeatherPET', 'BHET']

picrust_unstrat = pd.read_csv('//Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/pred_metagenome_unstrat.tsv', index_col=0, header=0, sep='\t')
picrust_unstrat_all = pd.DataFrame(picrust_unstrat)
picrust_unstrat = picrust_unstrat.loc[functions_keeping, :]
cols = list(picrust_unstrat.columns)
rename = {}
dropping = []
for col in cols:
  for sam in sample_order:
    if sam == col[:-1]:
      rename[col] = sam
    elif 'Inoc' in col:
      rename[col] = 'Day00Inoc'
  if 'Control' in col:
    dropping.append(col)
picrust_unstrat = picrust_unstrat.rename(columns=rename)
picrust_unstrat = picrust_unstrat.drop(dropping, axis=1).loc[:, sample_order]
handles = []
x = [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48]
alpha = [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3]
for c in range(len(colors)):
  handles.append(Patch(facecolor=colors[c], edgecolor='k', label=labels[c]))
day, alph = ['Inoculum', 'Day 1', 'Day 3', 'Day 7', 'Day 14', 'Day 21', 'Day 30', 'Day 42'], [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3]
handles.append(Patch(facecolor='w', edgecolor='w', label=''))
for d in range(len(day)):
  handles.append(Patch(facecolor='k', edgecolor='k', label=day[d], alpha=alph[d]))
def plot_picrust_raw(function):
  plt.figure(figsize=(10,4))
  ax1 = plt.subplot(121)
  ax2 = plt.subplot(122)
  count = 0
  for sample in sample_order:
    mean = picrust_unstrat.loc[function, sample].mean()
    std = picrust_unstrat.loc[function, sample].std()
    for a in range(len(trts)):
      if trts[a] in sample:
        if 'Water' in sample and 'Water' not in trts[a]:
          continue
        color, label = colors[a], labels[a]
    ax1.bar(x[count], mean, yerr=std, color=color, edgecolor='k', alpha=alpha[count], capsize=1)
    ax2.bar(x[count], mean, yerr=std, color=color, edgecolor='k', alpha=alpha[count], capsize=1)
    count += 1
  ax2.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), fontsize=8)
  ax1.set_title(function), ax2.set_title(function)
  ax2.semilogy()
  ax1.set_ylabel('Proportion of population (%)')
  plt.sca(ax1)
  plt.ylim(ymin=0)
  plt.tight_layout()

Raw PICRUSt2 abundance plots

For each of these, the left and right plots are identical but the right plot is plotted on a log scale.

PETase

plot_picrust_raw('PETase')
plt.show()

tphA2

plot_picrust_raw('tphA2')
plt.show()

tphA3

plot_picrust_raw('tphA3')
plt.show()

tphB

plot_picrust_raw('tphB')
plt.show()

K18074

plot_picrust_raw('K18074')
plt.show()

K18075

plot_picrust_raw('K18075')
plt.show()

K18076

plot_picrust_raw('K18076')
plt.show()

K00448

plot_picrust_raw('K00448')
plt.show()

K00449

plot_picrust_raw('K00449')
plt.show()

K01857

plot_picrust_raw('K01857')
plt.show()

K01607

plot_picrust_raw('K01607')
plt.show()

K14727

plot_picrust_raw('K14727')
plt.show()

K01055

plot_picrust_raw('K01055')
plt.show()

K01031

plot_picrust_raw('K01031')
plt.show()

K01032

plot_picrust_raw('K01032')
plt.show()

Fold change with no carbon control

Now I am plotting the fold change (calculated as log2, but converted back to absolute) for each K0 compared with the mean value for the no carbon control (i.e. the mean of all replicates for all days) .

picrust_unstrat_log = picrust_unstrat.replace(to_replace=0, value=0.00001)
#math.log2(num)
#rename = {}
#for col in list(picrust_unstrat.columns):
#  if 'NoC' in col:
#    rename[col] = 'NoC'
#picrust_unstrat_log.rename(columns=rename, inplace=True)
def get_fc(function):
  ma, mi = 0, 0
  this_abun = pd.DataFrame(picrust_unstrat_log.loc[function, :])
  x, y = [0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, ], [0, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
  plt.figure(figsize=(6,3))
  ax1 = plt.subplot(111)
  all_nc = []
  days = ['Day01', 'Day03', 'Day07', 'Day14', 'Day21', 'Day30', 'Day42']
  for day in days:
    this_day = this_abun.loc[day+'NoC', :].mean()
    this_day = math.log2(this_day)
    all_nc.append(this_day)
  """
  nc = this_abun.loc['NoC', :].mean()
  nc = math.log2(nc)
  """
  new_order = []
  for sample in sample_order:
    if 'NoC' not in sample:
      new_order.append(sample)
  colormap, norm = mpl.cm.get_cmap('RdBu_r', 256), mpl.colors.Normalize(vmin=-10, vmax=10)
  m = mpl.cm.ScalarMappable(norm=norm, cmap=colormap)
  ax1 = plt.subplot2grid((7,16), (1,1), rowspan=5, colspan=7)
  axinoc = plt.subplot2grid((7,16), (3,0))
  axcolbar = plt.subplot2grid((13,16), (0,1), colspan=7, frameon=False)
  ax2 = plt.subplot2grid((7,16), (0,8), colspan=7, frameon=False)
  plt.sca(ax2)
  plt.xticks([]), plt.yticks([])
  plt.sca(axcolbar)
  plt.xticks([]), plt.yticks([])
  for a in range(len(new_order)):
    this_mean_abs = this_abun.loc[new_order[a], :].mean()
    for b in range(len(days)):
      if days[b] in new_order[a]:
        nc = all_nc[b]
      elif 'Day00' in new_order[a]:
        nc = np.mean(all_nc)
    this_mean = math.log2(this_mean_abs)
    this_diff = math.pow(this_mean-nc, 2)
    if this_diff < 1 and this_diff != 0:
      this_diff = -1/this_diff
    #print(new_order[a], float(nc), float(this_mean_abs), float(this_diff), '\n')
    color = m.to_rgba(this_diff)
    if this_diff > ma: ma = this_diff
    if this_diff < mi: mi = this_diff
    if x[a] == 0 and y[a] == 0:
      axinoc.bar([1], [1], color=color, edgecolor='k', width=1)
      axinoc.set_xlim([0.5, 1.5]), axinoc.set_ylim([0,1])
    else:
      ax1.bar([x[a]], [1], bottom=[y[a]], color=color, edgecolor='k', width=1)
  plt.sca(ax1)
  plt.xticks([1, 2, 3, 4, 5, 6, 7], ['1', '3', '7', '14', '21', '30', '42'])
  plt.xlabel('Day')
  plt.yticks([0.5, 1.5, 2.5, 3.5, 4.5, 5.5], ['BHET', 'Weathered\nPET powder', 'PET powder', 'Amorphous PET\nBiofilm', 'Amorphous PET\nPlanktonic'])
  ax1.yaxis.set_ticks_position('right')
  plt.sca(axinoc)
  plt.xticks([1], ['Inoculum'], rotation=90)
  plt.yticks([])
  ax1.set_xlim([0.5, 7.5]), ax1.set_ylim([0, 5])
  cb1 = mpl.colorbar.ColorbarBase(axcolbar, cmap=colormap, norm=norm, orientation='horizontal')
  cb1.set_ticks([])
  axcolbar.text(-12, -25, '<-10')
  axcolbar.text(9, -25, '>10')
  axcolbar.text(0, 0, 'Fold change', ha='center', va='center')
  plt.tight_layout()

PETase

get_fc('PETase')
plt.tight_layout()
plt.show()

tphA2

get_fc('tphA2')
plt.tight_layout()
plt.show()

tphA3

get_fc('tphA3')
plt.tight_layout()
plt.show()

tphB

get_fc('tphB')
plt.tight_layout()
plt.show()

K18074

get_fc('K18074')
plt.tight_layout()
plt.show()

K18075

get_fc('K18075')
plt.tight_layout()
plt.show()

K18076

get_fc('K18076')
plt.tight_layout()
plt.show()

K00448

get_fc('K00448')
plt.tight_layout()
plt.show()

K00449

get_fc('K00449')
plt.tight_layout()
plt.show()

K01857

get_fc('K01857')
plt.tight_layout()
plt.show()

K01607

get_fc('K01607')
plt.tight_layout()
plt.show()

K14727

get_fc('K14727')
plt.tight_layout()
plt.show()

K01055

get_fc('K01055')
plt.tight_layout()
plt.show()

K01031

get_fc('K01031')
plt.tight_layout()
plt.show()

K01032

get_fc('K01032')
plt.tight_layout()
plt.show()

Contributions to key functions split by taxa

This first section is to filter the file very large (> 7 GB) default stratified PICRUSt2 output to only include the functions that we are interested in: PETase, tphA2, tphA3, tphB, K18074, K18075, K18076, K00448 and K00449. Note that for each of these, if the contributions of individual taxa were below 0.5% relative abundance, these have been grouped to ‘Other’. Taxonomic classifications shown are the lowest available for each. More than one bar for a taxonomic classification (e.g. as can be seen in K00448/K00449 for Thalassospira) indicates that more than one ASV is contributing to this genera.

key_functions = ['PETase', 'tphA2', 'tphA3', 'tphB', 'K18074', 'K18075', 'K18076', 'K00448', 'K00449']
functions = [[], [], [], [], [], [], [], [], []]
count = 0
with open('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/pred_metagenome_contrib.tsv') as f:
    for row in f:
        for fu in range(len(key_functions)):
            if key_functions[fu] in row:
                functions[fu].append(row)
cols = ['sample', 'function', 'taxon', 'taxon_abun', 'taxon_rel_abun', 'genome_function_count', 'taxon_function_abun', 'taxon_rel_function_abun']
count = 0
for func in functions:
    this_func = []
    for l in func:
        l = l.split('\t')
        for a in range(len(l)):
          l[a] = l[a].replace('\n', '')
        this_func.append(l)
    this_df = pd.DataFrame(this_func, columns=cols)
    this_df.to_csv('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/'+key_functions[count]+'.csv')
    count += 1
taxonomy = pd.read_csv('/Users/robynwright/Documents/OneDrive/PhD_Plastic_Oceans/Experiments/PET_MiSeq2/basic/Taxonomy.csv', header=0, index_col=0)
asvs = list(taxonomy.index.values)
labels = ['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
tax_dict = {}

for asv in asvs:
  this_asv = taxonomy.loc[asv, :].values
  new = ''
  for b in range(len(this_asv)):
    if not isinstance(this_asv[b], str):
      new = this_asv[b-1]
    elif b == 6 and isinstance(this_asv[b], str):
      new = this_asv[b-1]+' '+this_asv[b]
    if new != '':
      this_asv[b] = new
  tax_dict[asv] = this_asv
key_functions = ['PETase', 'tphA2', 'tphA3', 'tphB', 'K18074', 'K18075', 'K18076', 'K00448', 'K00449']
sample_order = ['Day00Inoc', 'Day01NoC', 'Day03NoC', 'Day07NoC', 'Day14NoC', 'Day21NoC', 'Day30NoC', 'Day42NoC', 'Day01LowCrysWater', 'Day03LowCrysWater', 'Day07LowCrysWater', 'Day14LowCrysWater', 'Day21LowCrysWater', 'Day30LowCrysWater', 'Day42LowCrysWater', 'Day01LowCrys', 'Day03LowCrys', 'Day07LowCrys', 'Day14LowCrys', 'Day21LowCrys', 'Day30LowCrys', 'Day42LowCrys', 'Day01PET', 'Day03PET', 'Day07PET', 'Day14PET', 'Day21PET', 'Day30PET', 'Day42PET', 'Day01WeatherPET', 'Day03WeatherPET', 'Day07WeatherPET', 'Day14WeatherPET', 'Day21WeatherPET', 'Day30WeatherPET', 'Day42WeatherPET', 'Day01BHET', 'Day03BHET', 'Day07BHET', 'Day14BHET', 'Day21BHET', 'Day30BHET', 'Day42BHET']
x = [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48]
def get_picrust_stratified_plot(function):
  plt.close()
  if function == 'monooxygenases' or function == 'dioxygenases':
    this_func = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/'+function+'.csv', header=0, index_col=0)
  else:
    this_func = pd.read_csv('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/'+function+'.csv', header=0, index_col=1)
  try:
    this_func.drop(['Unnamed: 0', 'function', 'taxon_abun', 'taxon_rel_abun', 'genome_function_count', 'taxon_rel_function_abun'], axis=1, inplace=True)
  except:
    do_nothing=True
  rename = {}
  all_samples = list(this_func.index.values)
  for samp in all_samples:
    for sam in sample_order:
      if sam == samp[:-1]:
        rename[samp] = sam
      elif 'Inoc' in samp:
        rename[samp] = 'Day00Inoc'
  this_func = this_func.rename(index=rename)
  print(this_func)
  plotting, all_asvs = [], []
  for sam in sample_order:
    if sam not in list(this_func.index.values): continue
    this_sample = pd.DataFrame(this_func.loc[sam, :])
    mi = 0.5
    if this_sample.shape[1] == 1:
      this_sample = this_sample.transpose()
    try:
      this_sample.set_index('taxon', inplace=True)
      if this_sample.shape[0] == 1:
        this_sample_above = this_sample
      else:
        this_sample = this_sample.groupby(by=this_sample.index, axis=0).mean()
        if this_sample.loc[:, 'taxon_function_abun'].sum() < 0.5:
          mi = 0.05
        this_sample_below = this_sample[this_sample['taxon_function_abun'] < 0.5]
        this_sample_above = this_sample[this_sample['taxon_function_abun'] >= 0.5]
        s_below = this_sample_below.loc[:, 'taxon_function_abun'].sum()
        this_sample_above.loc['Other'] = s_below
    except:
      print('Had an except')
      print(this_sample)
    plotting.append(this_sample_above)
    all_asvs += list(this_sample_above.index.values)
  genera, orders = [], []
  for asv in all_asvs:
    if asv == 'Other':
      genera.append('Other')
      orders.append('Other')
      continue
    genera.append(tax_dict[asv][-1])
    orders.append(tax_dict[asv][2])
  genera = sorted(list(set(genera)))
  colors = get_cols(len(genera))
  color_dict = {}
  handles = []
  for a in range(len(genera)):
    color_dict[genera[a]] = colors[a]
    handles.append(Patch(facecolor=colors[a], edgecolor='k', label=genera[a]))
  asv_col = {}
  for asv in all_asvs:
    if asv == 'Other':
      asv_col[asv] = color_dict['Other']
    else:
      asv_col[asv] = color_dict[tax_dict[asv][-1]]
  plt.figure(figsize=(10,4))
  ax1 = plt.subplot(111)
  for a in range(len(plotting)):
    this_plot = plotting[a]
    asvs = list(this_plot.index.values)
    bottom = 0
    for asv in asvs:
      this_num = this_plot.loc[asv, 'taxon_function_abun']
      ax1.bar(x[a], this_num, bottom=bottom, width=0.8, color=asv_col[asv], edgecolor='k')
      bottom += this_num
  fs = 10
  if len(handles) > 10:
    fs = 8
  cols = max([1, math.floor(len(handles)/10)])
  ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), ncol=cols, fontsize=fs)
  ymax = ax1.get_ylim()[1]/10
  labels = [1, 3, 7, 14, 21, 30, 42, 1, 3, 7, 14, 21, 30, 42, 1, 3, 7, 14, 21, 30, 42, 1, 3, 7, 14, 21, 30, 42, 1, 3, 7, 14, 21, 30, 42, 1, 3, 7, 14, 21, 30, 42]
  labels = [str(a) for a in labels]
  trt_labels, x_trt = ['No carbon', '\nplanktonic', '\nbiofilm', 'PET powder', 'Weathered\nPET powder', 'BHET', 'Amorphous PET'], [5, 13, 21, 29, 37, 45, 17]
  for a in range(len(trt_labels)):
    ax1.text(x_trt[a], -ymax, trt_labels[a], ha='center', va='top')
  plt.xticks(x[1:], labels, fontsize=8)
  plt.xlim([-1, 49])
  ax1.text(0, -0.5, 'Inoculum', ha='center', va='top', rotation=90)
  plt.ylabel('Relative abundance (%)')
  plt.title(function)
  plt.tight_layout()

PETase

get_picrust_stratified_plot('PETase')
plt.show()

tphA2

get_picrust_stratified_plot('tphA2')
plt.show()

tphA3

get_picrust_stratified_plot('tphA3')
plt.show()

tphB

get_picrust_stratified_plot('tphB')
plt.show()

K18074

get_picrust_stratified_plot('K18074')
plt.show()

K18075

get_picrust_stratified_plot('K18075')
plt.show()

K18076

get_picrust_stratified_plot('K18076')
plt.show()

K00448

get_picrust_stratified_plot('K00448')
plt.show()

K00449

get_picrust_stratified_plot('K00449')
plt.show()

Oxygenases in the predicted metagenomes

ko_functions = pd.read_csv('/Users/robynwright/Documents/OneDrive/Papers_writing/PET/Other/copy_github_folder/PICRUSt2/kegg_list.csv', header=0, index_col=0)
ko = list(ko_functions.index.values)
keeping_diox, keeping_mono = [], []
for k in range(len(ko)):
  if isinstance(ko_functions.loc[ko[k], 'Product'], str):
    if 'dioxygenase' in ko_functions.loc[ko[k], 'Product']:
      keeping_diox.append(True)
    else:
      keeping_diox.append(False)
  else:
    keeping_diox.append(False)
  if isinstance(ko_functions.loc[ko[k], 'Product'], str):
    if 'monooxygenase' in ko_functions.loc[ko[k], 'Product']:
      keeping_mono.append(True)
    else:
      keeping_mono.append(False)
  else:
    keeping_mono.append(False)
dioxygenase = ko_functions.loc[keeping_diox, :]
monooxygenase = ko_functions.loc[keeping_mono, :]

Dioxygenases

The dioxygenases plotted here are just a sum of all di/mono-oxygenase genes in all community members. I’ve highlighted 100% as this would be equivalent to each bacterium having 1 gene copy, but obviously they are likely less spread.

handles = []
labels = ['Inoculum', 'No carbon', 'Amorphous PET\nplanktonic', 'Amorphous PET\nbiofilm', 'PET powder', 'Weathered\nPET powder', 'BHET']
x = [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48]
alpha = [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3]
for c in range(len(colors)):
  handles.append(Patch(facecolor=colors[c], edgecolor='k', label=labels[c]))
day, alph = ['Inoculum', 'Day 1', 'Day 3', 'Day 7', 'Day 14', 'Day 21', 'Day 30', 'Day 42'], [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3]
handles.append(Patch(facecolor='w', edgecolor='w', label=''))
for d in range(len(day)):
  handles.append(Patch(facecolor='k', edgecolor='k', label=day[d], alpha=alph[d]))
  
def oxygenase_plot(ko, picrust, title):
  picrust_using = pd.DataFrame(picrust)
  picrust_ko = list(picrust_using.index.values)
  ko_new, rename = [], {}
  for k in ko:
    if k in picrust_ko:
      ko_new.append(k)
      rename[k] = 'Oxygenase'
  picrust_using = pd.DataFrame(picrust_using.loc[ko_new, :])
  picrust_using = picrust_using.rename(index=rename)
  picrust_using = picrust_using.groupby(by=picrust_using.index, axis=0).sum()
  
  plt.figure(figsize=(10,4))
  ax1 = plt.subplot(111)
  ax1.plot([x[0], x[-1]], [100,100], 'k--')
  count = 0
  for sample in sample_order:
    mean = picrust_using.loc['Oxygenase', sample].mean()
    std = picrust_using.loc['Oxygenase', sample].std()
    for a in range(len(trts)):
      if trts[a] in sample:
        if 'Water' in sample and 'Water' not in trts[a]:
          continue
        color, label = colors[a], labels[a]
    ax1.bar(x[count], mean, yerr=std, color=color, edgecolor='k', alpha=alpha[count], capsize=1)
    count += 1
  ax1.legend(handles=handles, loc='upper left', bbox_to_anchor=(1,1), fontsize=8)
  ax1.set_title(title)
  ax1.set_ylabel('Proportion of population (%)')
  plt.sca(ax1)
  plt.ylim(ymin=0)
  plt.xlim([-1, 49])
  plt.tight_layout()
  return

cols = list(picrust_unstrat_all.columns)
rename = {}
dropping = []
for col in cols:
  for sam in sample_order:
    if sam == col[:-1]:
      rename[col] = sam
    elif 'Inoc' in col:
      rename[col] = 'Day00Inoc'
  if 'Control' in col:
    dropping.append(col)
picrust_unstrat_all = picrust_unstrat_all.rename(columns=rename)
picrust_unstrat_all = picrust_unstrat_all.drop(dropping, axis=1)
oxygenase_plot(list(dioxygenase.index.values), picrust_unstrat_all, 'Dioxygenases')
plt.show()

Monooxygenases

oxygenase_plot(list(monooxygenase.index.values), picrust_unstrat_all, 'Monooxygenases')
plt.show()

Get stratified oxygenases

key_functions = list(dioxygenase.index.values)
functions = [[] for x in key_functions]
count = 0
with open('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/pred_metagenome_contrib.tsv') as f:
    for row in f:
        for fu in range(len(key_functions)):
            if key_functions[fu] in row:
                functions[fu].append(row)

cols = ['sample', 'function', 'taxon', 'taxon_abun', 'taxon_rel_abun', 'genome_function_count', 'taxon_function_abun', 'taxon_rel_function_abun']
count = 0
this_df = []
for func in functions:
    if func == []:
      continue
    this_func = []
    for l in func:
        l = l.split('\t')
        for a in range(len(l)):
          l[a] = l[a].replace('\n', '')
        this_func.append(l)
    new_df = pd.DataFrame(this_func, columns=cols)
    if count == 0:
      this_df = new_df
    else:
      this_df = pd.concat([this_df, new_df])
    count += 1
this_df = this_df.set_index(['sample', 'taxon'])
this_df.drop(['function', 'taxon_abun', 'taxon_rel_abun', 'genome_function_count', 'taxon_rel_function_abun'], axis=1, inplace=True)
this_df = this_df.astype(float)
this_df = this_df.groupby(['sample', 'taxon']).sum()
this_df.to_csv('/Users/robynwright/Documents/OneDrive/Github/PET-Plastisphere/2_community_succession/h_PICRUSt2/picrust_out/ko_all_metagenome_out/dioxygenases.csv')

Dioxygenases stratified

get_picrust_stratified_plot('dioxygenases')
plt.tight_layout()
plt.show()

Monooxygenases stratified

get_picrust_stratified_plot('monooxygenases')
plt.tight_layout()
plt.show()